{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "id": "E95jHM7cqdRT"
   },
   "outputs": [],
   "source": [
    "#!pip install pytorch-lightning timm optuna imagehash"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "id": "NcTqEQ_7NFKq"
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "import optuna\n",
    "from optuna.integration import PyTorchLightningPruningCallback\n",
    "import pytorch_lightning as pl\n",
    "from sklearn.metrics import mean_squared_error\n",
    "import os\n",
    "import shutil\n",
    "\n",
    "from pet_finder_library import PetDataMining, PetDataLoader, PetFinderTransferModelBCEWithAddFeatures\n",
    "\n",
    "\n",
    "INPUT_DATA_PREFIX = '.'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "id": "Uw5_1TfITJrb"
   },
   "outputs": [],
   "source": [
    "#!unzip \"/content/drive/MyDrive/data_science/petfinder-pawpularity-score.zip\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BSVa7vmriHBZ"
   },
   "source": [
    "# Оптимизация гиперпараметров модели\n",
    "\n",
    "Поиск оптимальных параметров осуществляется через фреймворк Optuna [1].\n",
    "\n",
    "Преимущества использования Optuna:\n",
    "\n",
    "- использует байесовские алгоритмы для оптимизации параметров;\n",
    "- возможность ранней остановки неперспективного эксперимента исходя из статистики (например, если результаты эксперимента ниже медианы из уже рассмотренных);\n",
    "- хорошая документация;\n",
    "- возможность хранения результатов в базе данных, восстановление поиска гиперпараметров из базы данных;\n",
    "- параллельный поиск параметров с разных устройств с помощью базы данных;\n",
    "- один из самых быстрых фреймворков оптимизации гиперпараметров (в 1.5 раза быстрее hyperopt) [2] .\n",
    "\n",
    "\n",
    "## Инициализация конфигурации модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "id": "O8nDPGh8TFsK"
   },
   "outputs": [],
   "source": [
    "SEED = 2022\n",
    "\n",
    "config = {'seed': SEED,\n",
    "          'get_optim_params': True,\n",
    "          'orig_train_csv': f'{INPUT_DATA_PREFIX}/train.csv',\n",
    "          'orig_test_csv': f'{INPUT_DATA_PREFIX}/test.csv',\n",
    "          'model': {\n",
    "              'model_name': 'swin_base_patch4_window7_224_in22k', \n",
    "              'output_dims': [1022, 1], \n",
    "              'use_weights': False, \n",
    "              'plot_epoch_loss': True,\n",
    "              'dropout': 0.3232967040113801, \n",
    "              'learning_rate': 4.812096281914617e-06, \n",
    "              'l2_regularization': 0.2766337962203344,\n",
    "              'adam_betas': (0.37438095497909607, 0.7504122450294624),\n",
    "              'pretrained': True,\n",
    "              'seed': SEED,\n",
    "              'full_trainable': True,\n",
    "              'use_mixup': True,\n",
    "              'mixup_alpha': 0.5, \n",
    "              'mixup_prob': 0.5,\n",
    "          },\n",
    "          'dataloader': {\n",
    "              'seed': SEED,\n",
    "              'n_splits': 5,\n",
    "              'size_dataset': None,\n",
    "              'use_kldiv': False,\n",
    "              'train_photo_dir': f'{INPUT_DATA_PREFIX}/train/',\n",
    "              'test_photo_dir': f'{INPUT_DATA_PREFIX}/test/',\n",
    "              'train_csv': \"files/mining_train.csv\",\n",
    "              'test_csv': \"files/mining_test.csv\",\n",
    "              'dataset_params': {\n",
    "                    'gaussian_sigma': 2,\n",
    "                    'pca_for_add_features': False,\n",
    "                    'std_for_add_features': True,\n",
    "                    'p_vflip': 0.5,\n",
    "                    'p_hflip': 0.5,\n",
    "                    'val_augmentation': False,\n",
    "                    'train_augmentation': True,\n",
    "                    'image_size': 224,\n",
    "                    'class_weights': None,\n",
    "                    'seed': SEED,\n",
    "               },\n",
    "              'train_loader_params': {\n",
    "                    'batch_size': 32,\n",
    "                    'shuffle': True,\n",
    "                    'num_workers': 2,\n",
    "                    'pin_memory': False,\n",
    "                    'drop_last': True,\n",
    "              },\n",
    "              'val_loader_params': {\n",
    "                    'batch_size': 32,\n",
    "                    'shuffle': False,\n",
    "                    'num_workers': 2,\n",
    "                    'pin_memory': False,\n",
    "                    'drop_last': False\n",
    "              },\n",
    "              'test_loader_params': {\n",
    "                    'batch_size': 1,\n",
    "                    'shuffle': False,\n",
    "                    'num_workers': 2,\n",
    "                    'pin_memory': False,\n",
    "                    'drop_last': False\n",
    "              }\n",
    "          },\n",
    "         'trainer': {\n",
    "              'max_epochs': 20,\n",
    "              'gpus': 1 if torch.cuda.is_available() else 0,\n",
    "              'progress_bar_refresh_rate': 10,\n",
    "              'resume_from_checkpoint': None,\n",
    "          },\n",
    "}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "BpJkxxMPi7Ki"
   },
   "source": [
    "Данные для обучение читаем из файла files/mining_train.csv, созданный в ноутбуке майнингу данных. Но тестовые требуется сгенерировать с помощью PetDataMining из оригинального датасета для тестирования в закрытой части соревнования. Так как этот датафрейм используется в классе PetDataLoader."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "m18WdnDyKAZv",
    "outputId": "2fa33ce1-eaa2-4ccd-d9b4-1d1843a1f4ca"
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using cache found in /root/.cache/torch/hub/ultralytics_yolov5_master\n",
      "YOLOv5 🚀 2022-1-20 torch 1.10.0+cu111 CPU\n",
      "\n",
      "Fusing layers... \n",
      "Model Summary: 574 layers, 140730220 parameters, 0 gradients\n",
      "Adding AutoShape... \n",
      "  0%|          | 0/8 [00:00<?, ?it/s]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 12%|█▎        | 1/8 [00:05<00:36,  5.19s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 25%|██▌       | 2/8 [00:10<00:30,  5.16s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 38%|███▊      | 3/8 [00:15<00:25,  5.18s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 50%|█████     | 4/8 [00:20<00:20,  5.20s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 62%|██████▎   | 5/8 [00:25<00:15,  5.19s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 75%|███████▌  | 6/8 [00:31<00:10,  5.17s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      " 88%|████████▊ | 7/8 [00:36<00:05,  5.14s/it]/usr/local/lib/python3.7/dist-packages/torch/autocast_mode.py:141: UserWarning:\n",
      "\n",
      "User provided device_type of 'cuda', but CUDA is not available. Disabling\n",
      "\n",
      "100%|██████████| 8/8 [00:41<00:00,  5.16s/it]\n"
     ]
    }
   ],
   "source": [
    "data_miner = PetDataMining(petfinder_csv=config['orig_test_csv'], drop_duplicates=False, plot_duplicate=False,\n",
    "                 duplicate_thresh=0.9, plot_detector=False,\n",
    "                 image_filepath=config['dataloader']['test_photo_dir'])\n",
    "test_mining_df = data_miner.start()\n",
    "test_mining_df.to_csv(config['dataloader']['test_csv'], index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "7KzKu4GVjkTb"
   },
   "source": [
    "Фунция оптимизации для Optuna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "id": "de6J-MOtxjQJ"
   },
   "outputs": [],
   "source": [
    "def objective(trial: optuna.trial.Trial, config=config, size_dataset=5000) -> float:\n",
    "    \"\"\" Функция оптимизации гиперпараметров модели\n",
    "\n",
    "    ВНИМАНИЕ: Функция изменяет словарь config.\n",
    "\n",
    "    Оптимизируются следующие параметры:\n",
    "    - количество слоев в полносвязной сети на выходе экстрактора признаков;\n",
    "    - количество нейронов в каждом слое полносвязной сети;\n",
    "    - значение dropout в полносвязной сети;\n",
    "    - скорость обучения learning_rate;\n",
    "    - регуляризация l2_regularization;\n",
    "    - значения beta1 и beta2 для AdamW оптимизатора;\n",
    "    - включение/отключение MixUp;\n",
    "    - включение/отключение аугментации;\n",
    "    - включение стандартизации дополнительных признаков std_for_add_features;\n",
    "    - включение преобразования ЗСФ для дополнительных признаков pca_for_add_features;\n",
    "\n",
    "    Параметры\n",
    "    ---------\n",
    "    trial : optuna.trial.Trial\n",
    "    config : dict\n",
    "      Словарь конфигурации модели\n",
    "    size_dataset : int\n",
    "      Размер датасета для обучения. По умолчанию 5000.\n",
    "    \"\"\"\n",
    "\n",
    "    # количество слоев в полносвязной сети на выходе экстрактора признаков\n",
    "    n_layers = trial.suggest_int(\"n_layers\", 0, 5)\n",
    "\n",
    "    # количество нейронов в каждом слое полносвязной сети;\n",
    "    output_dims = [\n",
    "        trial.suggest_int(\"n_units_l{}\".format(i), 16, 1024, log=True) for i in range(n_layers)\n",
    "    ]\n",
    "    output_dims.append(1)\n",
    "\n",
    "    # значение dropout в полносвязной сети;\n",
    "    dropout = trial.suggest_float(\"dropout\", 0.1, 0.7)\n",
    "    \n",
    "    # Можно подобрать тип модели, но выберем его константой\n",
    "    #model_name = trial.suggest_categorical('model_name', ['vgg16', 'resnet18', 'googlenet', 'alexnet'])\n",
    "    model_name = 'swin_tiny_patch4_window7_224'\n",
    "    \n",
    "    # скорость обучения learning_rate\n",
    "    learning_rate = trial.suggest_loguniform('learning_rate',  1e-6, 0.01)\n",
    "\n",
    "    # регуляризация l2_regularization\n",
    "    l2_regularization = trial.suggest_float('l2_regularization', 1e-6, 1.0)\n",
    "\n",
    "    # значения beta1 и beta2 для AdamW оптимизатора\n",
    "    adam_betas = (trial.suggest_float('adam_b1', 0.1, 0.999), trial.suggest_float('adam_b2', 0.1, 0.999))\n",
    "\n",
    "    # включение/отключение MixUp\n",
    "    use_mixup = trial.suggest_categorical('use_mixup', [True, False])\n",
    "\n",
    "    # включение стандартизации дополнительных признаков std_for_add_features\n",
    "    std_for_add_features = trial.suggest_categorical('std_for_add_features', [True, False])\n",
    "    config['dataloader']['dataset_params']['std_for_add_features'] = std_for_add_features\n",
    "    # включение преобразования ЗСФ для дополнительных признаков pca_for_add_features\n",
    "    pca_for_add_features = trial.suggest_categorical('pca_for_add_features', [True, False])\n",
    "    config['dataloader']['dataset_params']['pca_for_add_features'] = pca_for_add_features\n",
    "\n",
    "    plot_epoch_loss = False\n",
    "    full_trainable = True\n",
    "\n",
    "    use_weights = config['model']['use_weights']\n",
    "    mixup_alpha = config['model']['mixup_alpha']\n",
    "    mixup_prob = config['model']['mixup_prob']\n",
    "\n",
    "    model = PetFinderTransferModelBCEWithAddFeatures(model_name=model_name, dropout=dropout, output_dims=output_dims,\n",
    "                                                     learning_rate=learning_rate, l2_regularization=l2_regularization,\n",
    "                                                     adam_betas=adam_betas, use_weights=use_weights, plot_epoch_loss=plot_epoch_loss,\n",
    "                                                     full_trainable=full_trainable, use_mixup=use_mixup, mixup_alpha=mixup_alpha,\n",
    "                                                     mixup_prob=mixup_prob)\n",
    "\n",
    "    config['dataloader']['size_dataset'] = size_dataset\n",
    "    datamodule = PetDataLoader.create_kfold_loaders(**config['dataloader'])[0]\n",
    "\n",
    "    callbacks = [PyTorchLightningPruningCallback(trial, monitor=\"val_rmse\")]\n",
    "\n",
    "    trainer = pl.Trainer(\n",
    "        logger=True,\n",
    "        log_every_n_steps=10,\n",
    "        checkpoint_callback=False,\n",
    "        max_epochs=5,\n",
    "        gpus=1 if torch.cuda.is_available() else None,\n",
    "        callbacks=callbacks,\n",
    "    )\n",
    "\n",
    "    hyperparameters = dict(model_name=model_name, n_layers=n_layers, dropout=dropout, output_dims=output_dims, \n",
    "                           std_for_add_features=std_for_add_features, pca_for_add_features=pca_for_add_features,\n",
    "                           l2_regularization=l2_regularization, adam_b1=adam_betas[0], adam_beta2=adam_betas[1],\n",
    "                           learning_rate=learning_rate, use_weights=use_weights, full_trainable=full_trainable,\n",
    "                           use_mixup=use_mixup, mixup_alpha=mixup_alpha, mixup_prob=mixup_prob)\n",
    "    \n",
    "    trainer.logger.log_hyperparams(hyperparameters)\n",
    "    trainer.fit(model, datamodule=datamodule)\n",
    "\n",
    "    if len(model.train_history['val_rmse']) > 0:\n",
    "        best_val_rmse = np.min(model.train_history['val_rmse'])\n",
    "    else:\n",
    "        best_val_rmse = 99999\n",
    "\n",
    "    epoch_counts = len(model.train_history['val_rmse'])\n",
    "\n",
    "    return best_val_rmse\n",
    "\n",
    "def print_best_trial(study):\n",
    "    print(\"Number of finished trials: {}\".format(len(study.trials)))\n",
    "\n",
    "    print(\"Best trial:\")\n",
    "    trial = study.best_trial\n",
    "\n",
    "    print(\"  Number: {}\".format(trial.number))\n",
    "    print(\"  Value: {}\".format(trial.value))\n",
    "\n",
    "    print(\"  Params: \")\n",
    "    for key, value in trial.params.items():\n",
    "        print(\"    {}: {}\".format(key, value))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "kQ3wE2SfjBZi"
   },
   "source": [
    "## Цикл оптимизации гиперпараметров\n",
    "\n",
    "Создаем обучение с помощью optuna.create_study(). Указываем имя и путь к базе данных. А также указываем в параметре direction, что нам необходимо минимизировать функцию."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "7C4_xnmwkoyE"
   },
   "outputs": [],
   "source": [
    "if config['get_optim_params']:\n",
    "    \n",
    "    pruner = optuna.pruners.MedianPruner()\n",
    "\n",
    "    study = optuna.create_study(study_name=\"swin_tiny_patch4_window7_224\", direction=\"minimize\", \n",
    "                                pruner=pruner, load_if_exists=True,\n",
    "                                storage=\"sqlite:////optuna.db\")\n",
    "\n",
    "    if len(study.best_trials) > 0:\n",
    "        print_best_trial(study)\n",
    "\n",
    "    study.optimize(objective, n_trials=1000)\n",
    "\n",
    "    print_best_trial(study)"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "machine_shape": "hm",
   "name": "PetFinder Оптимизация гиперпараметров.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
